1   package org.apache.lucene.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.util.Arrays;
21  import java.util.List;
22  
23  import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
24  
25  /** 
26   * Class that Posting and PostingVector use to write byte
27   * streams into shared fixed-size byte[] arrays.  The idea
28   * is to allocate slices of increasing lengths For
29   * example, the first slice is 5 bytes, the next slice is
30   * 14, etc.  We start by writing our bytes into the first
31   * 5 bytes.  When we hit the end of the slice, we allocate
32   * the next slice and then write the address of the new
33   * slice into the last 4 bytes of the previous slice (the
34   * "forwarding address").
35   *
36   * Each slice is filled with 0's initially, and we mark
37   * the end with a non-zero byte.  This way the methods
38   * that are writing into the slice don't need to record
39   * its length and instead allocate a new slice once they
40   * hit a non-zero byte. 
41   * 
42   * @lucene.internal
43   **/
44  public final class ByteBlockPool {
45    public final static int BYTE_BLOCK_SHIFT = 15;
46    public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
47    public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
48  
49    /** Abstract class for allocating and freeing byte
50     *  blocks. */
51    public abstract static class Allocator {
52      protected final int blockSize;
53  
54      public Allocator(int blockSize) {
55        this.blockSize = blockSize;
56      }
57  
58      public abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
59  
60      public void recycleByteBlocks(List<byte[]> blocks) {
61        final byte[][] b = blocks.toArray(new byte[blocks.size()][]);
62        recycleByteBlocks(b, 0, b.length);
63      }
64  
65      public byte[] getByteBlock() {
66        return new byte[blockSize];
67      }
68    }
69    
70    /** A simple {@link Allocator} that never recycles. */
71    public static final class DirectAllocator extends Allocator {
72      
73      public DirectAllocator() {
74        this(BYTE_BLOCK_SIZE);
75      }
76  
77      public DirectAllocator(int blockSize) {
78        super(blockSize);
79      }
80  
81      @Override
82      public void recycleByteBlocks(byte[][] blocks, int start, int end) {
83      }
84    }
85    
86    /** A simple {@link Allocator} that never recycles, but
87     *  tracks how much total RAM is in use. */
88    public static class DirectTrackingAllocator extends Allocator {
89      private final Counter bytesUsed;
90      
91      public DirectTrackingAllocator(Counter bytesUsed) {
92        this(BYTE_BLOCK_SIZE, bytesUsed);
93      }
94  
95      public DirectTrackingAllocator(int blockSize, Counter bytesUsed) {
96        super(blockSize);
97        this.bytesUsed = bytesUsed;
98      }
99  
100     @Override
101     public byte[] getByteBlock() {
102       bytesUsed.addAndGet(blockSize);
103       return new byte[blockSize];
104     }
105 
106     @Override
107     public void recycleByteBlocks(byte[][] blocks, int start, int end) {
108       bytesUsed.addAndGet(-((end-start)* blockSize));
109       for (int i = start; i < end; i++) {
110         blocks[i] = null;
111       }
112     }
113   };
114 
115   /**
116    * array of buffers currently used in the pool. Buffers are allocated if
117    * needed don't modify this outside of this class.
118    */
119   public byte[][] buffers = new byte[10][];
120   
121   /** index into the buffers array pointing to the current buffer used as the head */
122   private int bufferUpto = -1;                        // Which buffer we are upto
123   /** Where we are in head buffer */
124   public int byteUpto = BYTE_BLOCK_SIZE;
125 
126   /** Current head buffer */
127   public byte[] buffer;
128   /** Current head offset */
129   public int byteOffset = -BYTE_BLOCK_SIZE;
130 
131   private final Allocator allocator;
132 
133   public ByteBlockPool(Allocator allocator) {
134     this.allocator = allocator;
135   }
136   
137   /**
138    * Resets the pool to its initial state reusing the first buffer and fills all
139    * buffers with <tt>0</tt> bytes before they reused or passed to
140    * {@link Allocator#recycleByteBlocks(byte[][], int, int)}. Calling
141    * {@link ByteBlockPool#nextBuffer()} is not needed after reset.
142    */
143   public void reset() {
144     reset(true, true);
145   }
146   
147   /**
148    * Expert: Resets the pool to its initial state reusing the first buffer. Calling
149    * {@link ByteBlockPool#nextBuffer()} is not needed after reset. 
150    * @param zeroFillBuffers if <code>true</code> the buffers are filled with <tt>0</tt>. 
151    *        This should be set to <code>true</code> if this pool is used with slices.
152    * @param reuseFirst if <code>true</code> the first buffer will be reused and calling
153    *        {@link ByteBlockPool#nextBuffer()} is not needed after reset iff the 
154    *        block pool was used before ie. {@link ByteBlockPool#nextBuffer()} was called before.
155    */
156   public void reset(boolean zeroFillBuffers, boolean reuseFirst) {
157     if (bufferUpto != -1) {
158       // We allocated at least one buffer
159 
160       if (zeroFillBuffers) {
161         for(int i=0;i<bufferUpto;i++) {
162           // Fully zero fill buffers that we fully used
163           Arrays.fill(buffers[i], (byte) 0);
164         }
165         // Partial zero fill the final buffer
166         Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0);
167       }
168      
169      if (bufferUpto > 0 || !reuseFirst) {
170        final int offset = reuseFirst ? 1 : 0;  
171        // Recycle all but the first buffer
172        allocator.recycleByteBlocks(buffers, offset, 1+bufferUpto);
173        Arrays.fill(buffers, offset, 1+bufferUpto, null);
174      }
175      if (reuseFirst) {
176        // Re-use the first buffer
177        bufferUpto = 0;
178        byteUpto = 0;
179        byteOffset = 0;
180        buffer = buffers[0];
181      } else {
182        bufferUpto = -1;
183        byteUpto = BYTE_BLOCK_SIZE;
184        byteOffset = -BYTE_BLOCK_SIZE;
185        buffer = null;
186      }
187     }
188   }
189   /**
190    * Advances the pool to its next buffer. This method should be called once
191    * after the constructor to initialize the pool. In contrast to the
192    * constructor a {@link ByteBlockPool#reset()} call will advance the pool to
193    * its first buffer immediately.
194    */
195   public void nextBuffer() {
196     if (1+bufferUpto == buffers.length) {
197       byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
198                                                         NUM_BYTES_OBJECT_REF)][];
199       System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
200       buffers = newBuffers;
201     }
202     buffer = buffers[1+bufferUpto] = allocator.getByteBlock();
203     bufferUpto++;
204 
205     byteUpto = 0;
206     byteOffset += BYTE_BLOCK_SIZE;
207   }
208   
209   /**
210    * Allocates a new slice with the given size. 
211    * @see ByteBlockPool#FIRST_LEVEL_SIZE
212    */
213   public int newSlice(final int size) {
214     if (byteUpto > BYTE_BLOCK_SIZE-size)
215       nextBuffer();
216     final int upto = byteUpto;
217     byteUpto += size;
218     buffer[byteUpto-1] = 16;
219     return upto;
220   }
221 
222   // Size of each slice.  These arrays should be at most 16
223   // elements (index is encoded with 4 bits).  First array
224   // is just a compact way to encode X+1 with a max.  Second
225   // array is the length of each slice, ie first slice is 5
226   // bytes, next slice is 14 bytes, etc.
227   
228   /**
229    * An array holding the offset into the {@link ByteBlockPool#LEVEL_SIZE_ARRAY}
230    * to quickly navigate to the next slice level.
231    */
232   public final static int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
233   
234   /**
235    * An array holding the level sizes for byte slices.
236    */
237   public final static int[] LEVEL_SIZE_ARRAY = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
238   
239   /**
240    * The first level size for new slices
241    * @see ByteBlockPool#newSlice(int)
242    */
243   public final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
244 
245   /**
246    * Creates a new byte slice with the given starting size and 
247    * returns the slices offset in the pool.
248    */
249   public int allocSlice(final byte[] slice, final int upto) {
250 
251     final int level = slice[upto] & 15;
252     final int newLevel = NEXT_LEVEL_ARRAY[level];
253     final int newSize = LEVEL_SIZE_ARRAY[newLevel];
254 
255     // Maybe allocate another block
256     if (byteUpto > BYTE_BLOCK_SIZE-newSize) {
257       nextBuffer();
258     }
259 
260     final int newUpto = byteUpto;
261     final int offset = newUpto + byteOffset;
262     byteUpto += newSize;
263 
264     // Copy forward the past 3 bytes (which we are about
265     // to overwrite with the forwarding address):
266     buffer[newUpto] = slice[upto-3];
267     buffer[newUpto+1] = slice[upto-2];
268     buffer[newUpto+2] = slice[upto-1];
269 
270     // Write forwarding address at end of last slice:
271     slice[upto-3] = (byte) (offset >>> 24);
272     slice[upto-2] = (byte) (offset >>> 16);
273     slice[upto-1] = (byte) (offset >>> 8);
274     slice[upto] = (byte) offset;
275         
276     // Write new level:
277     buffer[byteUpto-1] = (byte) (16|newLevel);
278 
279     return newUpto+3;
280   }
281 
282   // Fill in a BytesRef from term's length & bytes encoded in
283   // byte block
284   public void setBytesRef(BytesRef term, int textStart) {
285     final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT];
286     int pos = textStart & BYTE_BLOCK_MASK;
287     if ((bytes[pos] & 0x80) == 0) {
288       // length is 1 byte
289       term.length = bytes[pos];
290       term.offset = pos+1;
291     } else {
292       // length is 2 bytes
293       term.length = (bytes[pos]&0x7f) + ((bytes[pos+1]&0xff)<<7);
294       term.offset = pos+2;
295     }
296     assert term.length >= 0;
297   }
298   
299   /**
300    * Appends the bytes in the provided {@link BytesRef} at
301    * the current position.
302    */
303   public void append(final BytesRef bytes) {
304     int length = bytes.length;
305     if (length == 0) {
306       return;
307     }
308     int offset = bytes.offset;
309     int overflow = (length + byteUpto) - BYTE_BLOCK_SIZE;
310     do {
311       if (overflow <= 0) { 
312         System.arraycopy(bytes.bytes, offset, buffer, byteUpto, length);
313         byteUpto += length;
314         break;
315       } else {
316         final int bytesToCopy = length-overflow;
317         if (bytesToCopy > 0) {
318           System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesToCopy);
319           offset += bytesToCopy;
320           length -= bytesToCopy;
321         }
322         nextBuffer();
323         overflow = overflow - BYTE_BLOCK_SIZE;
324       }
325     }  while(true);
326   }
327   
328   /**
329    * Reads bytes bytes out of the pool starting at the given offset with the given  
330    * length into the given byte array at offset <tt>off</tt>.
331    * <p>Note: this method allows to copy across block boundaries.</p>
332    */
333   public void readBytes(final long offset, final byte bytes[], final int off, final int length) {
334     if (length == 0) {
335       return;
336     }
337     int bytesOffset = off;
338     int bytesLength = length;
339     int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
340     byte[] buffer = buffers[bufferIndex];
341     int pos = (int) (offset & BYTE_BLOCK_MASK);
342     int overflow = (pos + length) - BYTE_BLOCK_SIZE;
343     do {
344       if (overflow <= 0) {
345         System.arraycopy(buffer, pos, bytes, bytesOffset, bytesLength);
346         break;
347       } else {
348         final int bytesToCopy = length - overflow;
349         System.arraycopy(buffer, pos, bytes, bytesOffset, bytesToCopy);
350         pos = 0;
351         bytesLength -= bytesToCopy;
352         bytesOffset += bytesToCopy;
353         buffer = buffers[++bufferIndex];
354         overflow = overflow - BYTE_BLOCK_SIZE;
355       }
356     } while (true);
357   }
358 }
359